1 Wstęp

Poniższy raport ma na celu podjęcie analizy danych pochodzących z bazy Protein Data Bank (PDB). Dane zawierają informacje na temat ligandów. Zbór danych zawiera między innymi nazwę danej cząsteczki chemicznej, ilość atomów oraz elektronów oraz inne kolumny oparte o trójwymiarowy fragment gęstości elektronowej struktury. Przy analizie pominięte zostały kolumny utworzone przy pomocy wartości słownikowych. Ze względu na problem ze środowiskiem, zbiór początkowy ograniczono do 1 000 wierszy.

2 Użyte biblioteki

library(EDAWR)
library(dplyr)
library(DT)
library(ggplot2)
library(plotly)
library(reshape2)
library(cowplot)
library(data.table)
library(qwraps2)
library(fastDummies)
library(reshape2)
library(caret)
library(kableExtra)
library(pROC)

3 Powtarzalność wyników.

set.seed(123)

4 Wczytywanie danych z pliku.

initial<-fread("all_summary.csv", nrows = 100)
colClass <- sapply(initial, class)
pdb_table<-fread("all_summary.csv", nrows = 100000, colClasses = colClass)

5 Usuwanie wierszy z wybrana wartością res_name.

6 Przetwarzanie brakujących danych.

for(i in 1:ncol(pdb_clear_res_name_table)){
  pdb_clear_res_name_table[is.na(pdb_clear_res_name_table[,i]), i] <- mean(pdb_clear_res_name_table[,i], na.rm = TRUE)
}

Ze zbioru usuniete zostaly wiersze posiadajace wartosci zmiennej res_name rozne od: UNK, UNX, UNL, DUM, N, BLOB, ALA, ARG, ASN, ASP, CYS, GLN, GLU, GLY, HIS, ILE, LEU, LYS, MET, MSE, PHE, PRO, SEC, SER, THR, TRP, TYR, VAL, DA, DG, DT, DC, DU, A, G, T, C, U, HOH, H20, WAT. Zbior ograniczono do kolumn opisanych na stronie projektu, nie uwzgledniono kolumn nie wykorzystywanych do klasyfikacji poza kolumnami res_name, local_res_atom_non_h_count, local_res_atom_non_h_count, dict_atom_non_h_count, dict_atom_non_h_electron_sum. Wartosci ‘Na’ zostaly zastapione srednia wartoscia dla danej kolumny.

7 Podsumowanie zbioru.

Zbior przed wyczyszczeniem posiadał wymiar: 100000, 412 [wierszy, kolumn]. Po oczyszczeniu zbioru wymiary wynosza: 98357, 336 [wierszy, kolumn].

res_name
Length:98357
Class :character
Mode :character
mean sd median min max class
local_res_atom_non_h_count 1.353000e+01 1.491000e+01 6.000000e+00 1.00 1.060000e+02 numeric
local_res_atom_non_h_electron_sum 1.006700e+02 1.014700e+02 5.100000e+01 3.00 1.848000e+03 numeric
dict_atom_non_h_count 1.382000e+01 1.530000e+01 6.000000e+00 1.00 1.060000e+02 numeric
dict_atom_non_h_electron_sum 1.027900e+02 1.030100e+02 5.200000e+01 3.00 8.580000e+02 numeric
local_volume 8.581200e+02 1.437090e+03 3.427200e+02 49.25 4.932517e+04 numeric
local_electrons 1.779000e+01 2.551000e+01 7.720000e+00 0.01 4.424400e+02 numeric
local_mean 2.000000e-02 2.000000e-02 2.000000e-02 0.00 3.700000e-01 numeric
local_std 1.200000e-01 9.000000e-02 1.000000e-01 0.00 1.960000e+00 numeric
local_min 0.000000e+00 0.000000e+00 0.000000e+00 0.00 0.000000e+00 numeric
local_max 1.340000e+00 1.580000e+00 8.800000e-01 0.03 4.344000e+01 numeric
local_skewness 2.200000e-01 1.900000e-01 1.700000e-01 0.01 3.810000e+00 numeric
part_00_shape_segments_count 3.407900e+02 1.140060e+03 2.500000e+01 0.00 1.145770e+05 numeric
part_00_density_segments_count 3.407900e+02 1.140060e+03 2.500000e+01 0.00 1.145770e+05 numeric
part_00_volume 3.316000e+01 5.181000e+01 1.415000e+01 0.00 2.427940e+03 numeric
part_00_electrons 1.759000e+01 2.537000e+01 7.650000e+00 0.00 4.411400e+02 numeric
part_00_mean 6.000000e-01 3.900000e-01 5.100000e-01 0.00 8.270000e+00 numeric
part_00_std 2.100000e-01 2.900000e-01 1.200000e-01 0.00 7.040000e+00 numeric
part_00_max 1.340000e+00 1.580000e+00 8.800000e-01 0.00 4.344000e+01 numeric
part_00_max_over_std 9.690000e+00 7.590000e+00 7.160000e+00 0.00 1.732500e+02 numeric
part_00_skewness 2.100000e-01 3.400000e-01 1.100000e-01 0.00 9.520000e+00 numeric
part_00_parts 1.070000e+00 3.800000e-01 1.000000e+00 0.00 2.800000e+01 numeric
part_00_shape_O3 1.710713e+06 8.395962e+06 9.596353e+04 121.39 1.361105e+09 numeric
part_00_shape_O4 1.358132e+13 1.404527e+15 2.274726e+09 3807.05 4.027551e+17 numeric
part_00_shape_O5 4.454644e+20 9.737382e+22 1.469504e+13 33777.03 2.908412e+25 numeric
part_00_shape_FL 6.482886e+16 1.471043e+19 3.709230e+10 218.65 4.575624e+21 numeric
part_00_shape_O3_norm 4.900000e-01 3.400000e-01 3.800000e-01 0.23 3.965000e+01 numeric
part_00_shape_O4_norm 6.000000e-02 8.000000e-02 3.000000e-02 0.02 6.010000e+00 numeric
part_00_shape_O5_norm 0.000000e+00 0.000000e+00 0.000000e+00 0.00 4.100000e-01 numeric
part_00_shape_FL_norm 5.000000e-02 2.800000e-01 1.000000e-02 0.00 3.855000e+01 numeric
part_00_shape_I1 2.400327e+09 6.217083e+10 7.711962e+06 528.82 1.276334e+13 numeric
part_00_shape_I2 3.127208e+20 8.195815e+22 8.713294e+12 56939.71 2.563219e+25 numeric
part_00_shape_I3 2.941910e+21 5.805190e+23 2.433944e+13 121692.16 1.625410e+26 numeric
part_00_shape_I4 4.140524e+16 8.903322e+18 2.033575e+10 118.50 2.754671e+21 numeric
part_00_shape_I5 2.578950e+16 5.065610e+18 6.160362e+09 14.36 1.540703e+21 numeric
part_00_shape_I6 2.429709e+17 3.499795e+19 3.395871e+11 28626.18 8.793191e+21 numeric
part_00_shape_I1_norm 5.700000e-01 8.930000e+00 2.300000e-01 0.06 2.760570e+03 numeric
part_00_shape_I2_norm 9.000000e-02 9.100000e-01 1.000000e-02 0.00 1.856800e+02 numeric
part_00_shape_I3_norm 7.965000e+01 2.429254e+04 3.000000e-02 0.00 7.617375e+06 numeric
part_00_shape_I4_norm 4.000000e-02 1.800000e-01 0.000000e+00 0.00 1.759000e+01 numeric
part_00_shape_I5_norm 2.000000e-02 1.400000e-01 0.000000e+00 0.00 9.560000e+00 numeric
part_00_shape_I6_norm 1.490000e+00 3.493000e+02 4.000000e-02 0.00 1.094147e+05 numeric
part_00_shape_M000 4.144620e+03 6.476230e+03 1.769000e+03 38.00 3.034930e+05 numeric
part_00_shape_CI 3.000000e-02 4.110000e+00 0.000000e+00 -86.47 5.904000e+01 numeric
part_00_shape_E3_E1 2.400000e-01 2.000000e-01 1.700000e-01 0.00 9.900000e-01 numeric
part_00_shape_E2_E1 4.200000e-01 2.400000e-01 3.800000e-01 0.00 1.000000e+00 numeric
part_00_shape_E3_E2 5.500000e-01 2.300000e-01 5.700000e-01 0.01 1.000000e+00 numeric
part_00_shape_sqrt_E1 8.050000e+00 5.950000e+00 5.880000e+00 1.24 1.222400e+02 numeric
part_00_shape_sqrt_E2 4.430000e+00 2.740000e+00 3.500000e+00 0.85 3.072000e+01 numeric
part_00_shape_sqrt_E3 2.940000e+00 1.430000e+00 2.570000e+00 0.64 1.993000e+01 numeric
part_00_density_O3 8.039074e+05 2.699869e+06 4.665748e+04 9.71 1.530951e+08 numeric
part_00_density_O4 1.348644e+12 2.448429e+13 5.434374e+08 24.38 4.953637e+15 numeric
part_00_density_O5 1.873140e+18 1.855912e+20 1.708827e+12 17.32 4.188726e+22 numeric
part_00_density_FL 2.123917e+15 1.828635e+17 8.029246e+09 -3.01 5.509552e+19 numeric
part_00_density_O3_norm 7.500000e-01 1.480000e+00 6.100000e-01 0.04 4.123300e+02 numeric
part_00_density_O4_norm 1.500000e-01 2.300000e-01 9.000000e-02 0.00 3.293000e+01 numeric
part_00_density_O5_norm 1.000000e-02 2.000000e-02 0.000000e+00 0.00 1.320000e+00 numeric
part_00_density_FL_norm 2.800000e-01 1.780000e+00 2.000000e-02 -0.03 1.974900e+02 numeric
part_00_density_I1 9.567676e+08 7.569546e+09 3.412476e+06 42.22 1.319042e+12 numeric
part_00_density_I2 6.133550e+18 9.423662e+20 1.722607e+12 363.10 2.912948e+23 numeric
part_00_density_I3 3.749309e+19 3.126790e+21 4.794211e+12 775.23 9.111782e+23 numeric
part_00_density_I4 1.343260e+15 1.029856e+17 4.872787e+09 -1.01 3.063554e+19 numeric
part_00_density_I5 8.228219e+14 5.104817e+16 1.958822e+09 0.32 1.432889e+19 numeric
part_00_density_I6 1.026506e+16 3.962681e+17 7.060434e+10 182.74 1.085478e+20 numeric
part_00_density_I1_norm 4.800000e+00 9.586100e+02 5.900000e-01 0.00 2.985910e+05 numeric
part_00_density_I2_norm 7.400000e-01 1.814000e+01 5.000000e-02 0.00 4.897260e+03 numeric
part_00_density_I3_norm 9.185392e+05 2.841841e+08 1.500000e-01 0.00 8.911718e+10 numeric
part_00_density_I4_norm 2.100000e-01 1.520000e+00 1.000000e-02 -0.01 1.713100e+02 numeric
part_00_density_I5_norm 1.600000e-01 1.400000e+00 0.000000e+00 0.00 1.538600e+02 numeric
part_00_density_I6_norm 1.302870e+03 3.927637e+05 1.700000e-01 0.00 1.230801e+08 numeric
part_00_density_M000 2.199290e+03 3.171130e+03 9.568900e+02 3.05 5.514218e+04 numeric
part_00_density_CI 4.000000e-02 4.690000e+00 0.000000e+00 -91.33 8.996000e+01 numeric
part_00_density_E3_E1 2.400000e-01 2.000000e-01 1.700000e-01 0.00 9.800000e-01 numeric
part_00_density_E2_E1 4.200000e-01 2.500000e-01 3.800000e-01 0.00 1.000000e+00 numeric
part_00_density_E3_E2 5.500000e-01 2.300000e-01 5.800000e-01 0.01 1.000000e+00 numeric
part_00_density_sqrt_E1 7.730000e+00 5.840000e+00 5.560000e+00 1.24 1.222400e+02 numeric
part_00_density_sqrt_E2 4.210000e+00 2.640000e+00 3.270000e+00 0.85 3.077000e+01 numeric
part_00_density_sqrt_E3 2.780000e+00 1.350000e+00 2.420000e+00 0.64 1.938000e+01 numeric
part_00_shape_Z_7_3 4.108000e+01 3.672000e+01 2.658000e+01 7.22 5.587100e+02 numeric
part_00_shape_Z_0_0 2.622000e+01 1.738000e+01 2.055000e+01 3.01 2.691700e+02 numeric
part_00_shape_Z_7_0 1.744000e+01 1.679000e+01 1.018000e+01 0.87 3.669900e+02 numeric
part_00_shape_Z_7_1 2.824000e+01 2.613000e+01 1.756000e+01 3.66 4.461400e+02 numeric
part_00_shape_Z_3_0 1.509000e+01 1.305000e+01 1.067000e+01 0.65 2.081300e+02 numeric
part_00_shape_Z_5_2 3.503000e+01 2.931000e+01 2.462000e+01 4.66 4.551000e+02 numeric
part_00_shape_Z_6_1 3.179000e+01 2.865000e+01 2.087000e+01 1.81 4.762100e+02 numeric
part_00_shape_Z_3_1 2.444000e+01 1.918000e+01 1.803000e+01 2.55 2.841600e+02 numeric
part_00_shape_Z_6_0 1.487000e+01 1.414000e+01 9.910000e+00 0.02 2.665800e+02 numeric
part_00_shape_Z_2_1 3.837000e+01 2.793000e+01 2.873000e+01 3.12 4.041800e+02 numeric
part_00_shape_Z_6_3 4.667000e+01 4.094000e+01 3.114000e+01 4.11 5.742700e+02 numeric
part_00_shape_Z_2_0 2.817000e+01 2.001000e+01 2.183000e+01 1.68 3.265300e+02 numeric
part_00_shape_Z_6_2 4.215000e+01 3.754000e+01 2.789000e+01 3.01 5.397700e+02 numeric
part_00_shape_Z_5_0 1.835000e+01 1.722000e+01 1.217000e+01 1.17 3.155700e+02 numeric
part_00_shape_Z_5_1 2.896000e+01 2.480000e+01 2.037000e+01 3.88 4.074900e+02 numeric
part_00_shape_Z_4_2 4.390000e+01 3.597000e+01 3.137000e+01 3.61 5.018300e+02 numeric
part_00_shape_Z_1_0 1.430000e+00 2.100000e-01 1.400000e+00 0.76 2.400000e+00 numeric
part_00_shape_Z_4_1 3.777000e+01 3.144000e+01 2.696000e+01 2.68 4.656000e+02 numeric
part_00_shape_Z_7_2 3.652000e+01 3.330000e+01 2.328000e+01 6.34 5.303200e+02 numeric
part_00_shape_Z_4_0 2.053000e+01 1.769000e+01 1.488000e+01 0.03 3.130900e+02 numeric
part_00_density_Z_7_3 3.040000e+01 2.770000e+01 1.939000e+01 2.89 2.006900e+02 numeric
part_00_density_Z_0_0 1.907000e+01 1.271000e+01 1.511000e+01 0.85 1.147400e+02 numeric
part_00_density_Z_7_0 1.493000e+01 1.379000e+01 8.510000e+00 0.98 1.203200e+02 numeric
part_00_density_Z_7_1 2.238000e+01 2.054000e+01 1.387000e+01 2.88 1.505300e+02 numeric
part_00_density_Z_3_0 1.132000e+01 9.760000e+00 7.760000e+00 0.61 8.854000e+01 numeric
part_00_density_Z_5_2 2.558000e+01 2.175000e+01 1.767000e+01 2.15 1.711700e+02 numeric
part_00_density_Z_6_1 2.477000e+01 2.262000e+01 1.711000e+01 0.51 1.595100e+02 numeric
part_00_density_Z_3_1 1.732000e+01 1.397000e+01 1.240000e+01 1.44 1.208600e+02 numeric
part_00_density_Z_6_0 1.258000e+01 1.261000e+01 8.040000e+00 0.01 1.131700e+02 numeric
part_00_density_Z_2_1 2.806000e+01 2.017000e+01 2.141000e+01 0.91 1.632100e+02 numeric
part_00_density_Z_6_3 3.434000e+01 3.089000e+01 2.349000e+01 1.16 2.314200e+02 numeric
part_00_density_Z_2_0 2.158000e+01 1.494000e+01 1.689000e+01 0.51 1.352800e+02 numeric
part_00_density_Z_6_2 3.160000e+01 2.865000e+01 2.161000e+01 0.85 1.976900e+02 numeric
part_00_density_Z_5_0 1.495000e+01 1.351000e+01 9.720000e+00 0.87 1.122900e+02 numeric
part_00_density_Z_5_1 2.191000e+01 1.874000e+01 1.512000e+01 2.14 1.507000e+02 numeric
part_00_density_Z_4_2 3.233000e+01 2.633000e+01 2.343000e+01 1.01 2.072000e+02 numeric
part_00_density_Z_1_0 1.420000e+00 2.200000e-01 1.390000e+00 0.68 2.400000e+00 numeric
part_00_density_Z_4_1 2.857000e+01 2.323000e+01 2.086000e+01 0.76 1.763800e+02 numeric
part_00_density_Z_7_2 2.769000e+01 2.543000e+01 1.746000e+01 2.89 1.855800e+02 numeric
part_00_density_Z_4_0 1.702000e+01 1.409000e+01 1.270000e+01 0.01 1.172400e+02 numeric
part_01_shape_segments_count 2.846100e+02 9.739000e+02 1.500000e+01 0.00 6.920200e+04 numeric
part_01_density_segments_count 2.846100e+02 9.739000e+02 1.500000e+01 0.00 6.920200e+04 numeric
part_01_volume 2.547000e+01 4.207000e+01 1.025000e+01 0.00 1.996250e+03 numeric
part_01_electrons 1.513000e+01 2.311000e+01 6.080000e+00 0.00 3.957000e+02 numeric
part_01_mean 6.500000e-01 4.200000e-01 5.600000e-01 0.00 8.380000e+00 numeric
part_01_std 2.000000e-01 3.000000e-01 1.100000e-01 0.00 7.040000e+00 numeric
part_01_max 1.340000e+00 1.580000e+00 8.800000e-01 0.00 4.344000e+01 numeric
part_01_max_over_std 9.660000e+00 7.620000e+00 7.160000e+00 0.00 1.732500e+02 numeric
part_01_skewness 2.000000e-01 3.400000e-01 9.000000e-02 0.00 9.740000e+00 numeric
part_01_parts 1.270000e+00 7.000000e-01 1.000000e+00 0.00 2.400000e+01 numeric
part_01_shape_O3 1.285875e+06 5.895761e+06 5.895573e+04 75.16 8.067921e+08 numeric
part_01_shape_O4 6.664815e+12 5.470025e+14 8.512691e+08 1818.62 1.394603e+17 numeric
part_01_shape_O5 1.205822e+20 2.321036e+22 3.371030e+12 13532.12 5.890566e+24 numeric
part_01_shape_FL 2.578948e+16 5.245823e+18 1.015322e+10 0.00 1.624363e+21 numeric
part_01_shape_O3_norm 5.300000e-01 4.200000e-01 3.700000e-01 0.23 3.371000e+01 numeric
part_01_shape_O4_norm 7.000000e-02 1.200000e-01 3.000000e-02 0.02 1.175000e+01 numeric
part_01_shape_O5_norm 0.000000e+00 1.000000e-02 0.000000e+00 0.00 1.230000e+00 numeric
part_01_shape_FL_norm 1.300000e-01 1.190000e+00 0.000000e+00 0.00 1.905600e+02 numeric
part_01_shape_I1 1.680991e+09 2.954402e+10 3.880954e+06 210.50 6.873800e+12 numeric
part_01_shape_I2 1.064721e+20 2.578801e+22 2.206661e+12 10919.68 8.031201e+24 numeric
part_01_shape_I3 5.570987e+20 8.974707e+22 6.123784e+12 10533.69 2.454099e+25 numeric
part_01_shape_I4 1.607613e+16 2.993065e+18 5.681467e+09 0.00 9.148749e+20 numeric
part_01_shape_I5 9.600569e+15 1.529090e+18 1.565131e+09 0.00 4.418825e+20 numeric
part_01_shape_I6 8.128284e+16 1.008962e+19 1.035663e+11 5635.88 2.955845e+21 numeric
part_01_shape_I1_norm 7.400000e-01 6.660000e+00 2.200000e-01 0.06 2.026680e+03 numeric
part_01_shape_I2_norm 2.000000e-01 4.340000e+00 1.000000e-02 0.00 1.041520e+03 numeric
part_01_shape_I3_norm 4.463000e+01 1.308951e+04 2.000000e-02 0.00 4.105097e+06 numeric
part_01_shape_I4_norm 1.000000e-01 1.180000e+00 0.000000e+00 0.00 1.969600e+02 numeric
part_01_shape_I5_norm 8.000000e-02 1.220000e+00 0.000000e+00 0.00 2.012300e+02 numeric
part_01_shape_I6_norm 1.320000e+00 2.178300e+02 4.000000e-02 0.00 6.830019e+04 numeric
part_01_shape_M000 3.211340e+03 5.250800e+03 1.316000e+03 32.00 2.495310e+05 numeric
part_01_shape_CI 4.000000e-02 4.510000e+00 0.000000e+00 -96.72 6.850000e+01 numeric
part_01_shape_E3_E1 2.500000e-01 2.100000e-01 1.800000e-01 0.00 9.800000e-01 numeric
part_01_shape_E2_E1 4.300000e-01 2.500000e-01 4.000000e-01 0.00 1.000000e+00 numeric
part_01_shape_E3_E2 5.600000e-01 2.300000e-01 5.900000e-01 0.01 1.000000e+00 numeric
part_01_shape_sqrt_E1 7.460000e+00 6.000000e+00 5.270000e+00 0.98 1.212400e+02 numeric
part_01_shape_sqrt_E2 4.030000e+00 2.740000e+00 3.150000e+00 0.53 2.979000e+01 numeric
part_01_shape_sqrt_E3 2.660000e+00 1.420000e+00 2.340000e+00 0.37 1.906000e+01 numeric
part_01_density_O3 6.785104e+05 2.326233e+06 3.276166e+04 9.11 1.052066e+08 numeric
part_01_density_O4 9.759291e+11 1.512961e+13 2.580682e+08 25.84 3.091615e+15 numeric
part_01_density_O5 1.003302e+18 9.431303e+19 5.587687e+11 18.15 2.687360e+22 numeric
part_01_density_FL 1.383468e+15 8.269370e+16 2.736651e+09 -5.57 2.393784e+19 numeric
part_01_density_O3_norm 7.700000e-01 1.190000e+00 5.700000e-01 0.04 3.097900e+02 numeric
part_01_density_O4_norm 1.600000e-01 3.000000e-01 8.000000e-02 0.00 3.095000e+01 numeric
part_01_density_O5_norm 1.000000e-02 3.000000e-02 0.000000e+00 0.00 3.420000e+00 numeric
part_01_density_FL_norm 6.700000e-01 1.259000e+01 1.000000e-02 0.00 3.090780e+03 numeric
part_01_density_I1 7.973432e+08 5.791724e+09 1.993269e+06 27.93 8.349368e+11 numeric
part_01_density_I2 3.350737e+18 3.870776e+20 5.880947e+11 173.64 1.167021e+23 numeric
part_01_density_I3 2.268220e+19 1.311066e+21 1.556676e+12 215.81 3.593334e+23 numeric
part_01_density_I4 8.948907e+14 4.539024e+16 1.643640e+09 -0.71 1.265893e+19 numeric
part_01_density_I5 5.691722e+14 2.230580e+16 5.962343e+08 0.02 5.139649e+18 numeric
part_01_density_I6 7.241671e+15 1.987943e+17 2.903723e+10 95.17 4.672310e+19 numeric
part_01_density_I1_norm 3.500000e+00 5.456800e+02 5.000000e-01 0.00 1.711294e+05 numeric
part_01_density_I2_norm 1.520000e+00 3.445000e+01 4.000000e-02 0.00 8.160000e+03 numeric
part_01_density_I3_norm 3.002167e+05 9.332569e+07 1.100000e-01 0.00 2.926873e+10 numeric
part_01_density_I4_norm 5.900000e-01 1.289000e+01 1.000000e-02 0.00 3.197240e+03 numeric
part_01_density_I5_norm 5.300000e-01 1.323000e+01 0.000000e+00 0.00 3.268210e+03 numeric
part_01_density_I6_norm 5.463800e+02 1.689748e+05 1.300000e-01 0.00 5.299373e+07 numeric
part_01_density_M000 1.907350e+03 2.883050e+03 7.844400e+02 3.12 4.946192e+04 numeric
part_01_density_CI 4.000000e-02 5.080000e+00 0.000000e+00 -102.45 7.690000e+01 numeric
part_01_density_E3_E1 2.600000e-01 2.100000e-01 1.800000e-01 0.00 9.800000e-01 numeric
part_01_density_E2_E1 4.300000e-01 2.500000e-01 4.000000e-01 0.00 1.000000e+00 numeric
part_01_density_E3_E2 5.700000e-01 2.400000e-01 5.900000e-01 0.01 1.000000e+00 numeric
part_01_density_sqrt_E1 7.200000e+00 5.880000e+00 4.950000e+00 0.97 1.212400e+02 numeric
part_01_density_sqrt_E2 3.850000e+00 2.650000e+00 2.960000e+00 0.53 2.979000e+01 numeric
part_01_density_sqrt_E3 2.530000e+00 1.350000e+00 2.220000e+00 0.37 1.869000e+01 numeric
part_01_shape_Z_7_3 3.588000e+01 3.398000e+01 2.194000e+01 4.61 4.650800e+02 numeric
part_01_shape_Z_0_0 2.246000e+01 1.612000e+01 1.772000e+01 2.76 2.440700e+02 numeric
part_01_shape_Z_7_0 1.604000e+01 1.547000e+01 8.680000e+00 1.20 2.915600e+02 numeric
part_01_shape_Z_7_1 2.502000e+01 2.415000e+01 1.435000e+01 3.42 3.648400e+02 numeric
part_01_shape_Z_3_0 1.343000e+01 1.224000e+01 8.960000e+00 0.90 1.877700e+02 numeric
part_01_shape_Z_5_2 3.028000e+01 2.716000e+01 2.044000e+01 3.10 3.791800e+02 numeric
part_01_shape_Z_6_1 2.740000e+01 2.684000e+01 1.695000e+01 1.41 3.717000e+02 numeric
part_01_shape_Z_3_1 2.129000e+01 1.788000e+01 1.542000e+01 2.42 2.414300e+02 numeric
part_01_shape_Z_6_0 1.313000e+01 1.360000e+01 8.170000e+00 0.02 2.033400e+02 numeric
part_01_shape_Z_2_1 3.271000e+01 2.592000e+01 2.420000e+01 1.71 3.463800e+02 numeric
part_01_shape_Z_6_3 4.022000e+01 3.817000e+01 2.567000e+01 3.40 4.742100e+02 numeric
part_01_shape_Z_2_0 2.383000e+01 1.867000e+01 1.812000e+01 0.16 2.846000e+02 numeric
part_01_shape_Z_6_2 3.614000e+01 3.495000e+01 2.265000e+01 2.49 4.354500e+02 numeric
part_01_shape_Z_5_0 1.644000e+01 1.595000e+01 9.730000e+00 1.07 2.616100e+02 numeric
part_01_shape_Z_5_1 2.497000e+01 2.288000e+01 1.661000e+01 2.41 3.335900e+02 numeric
part_01_shape_Z_4_2 3.741000e+01 3.346000e+01 2.569000e+01 2.52 4.144300e+02 numeric
part_01_shape_Z_1_0 1.540000e+00 3.100000e-01 1.500000e+00 0.71 4.280000e+00 numeric
part_01_shape_Z_4_1 3.193000e+01 2.924000e+01 2.177000e+01 1.34 3.724600e+02 numeric
part_01_shape_Z_7_2 3.181000e+01 3.069000e+01 1.887000e+01 4.02 4.419500e+02 numeric
part_01_shape_Z_4_0 1.742000e+01 1.671000e+01 1.169000e+01 0.01 2.356900e+02 numeric
part_01_density_Z_7_3 2.807000e+01 2.673000e+01 1.647000e+01 2.69 1.947200e+02 numeric
part_01_density_Z_0_0 1.727000e+01 1.247000e+01 1.368000e+01 0.86 1.086700e+02 numeric
part_01_density_Z_7_0 1.431000e+01 1.319000e+01 7.810000e+00 1.27 1.090600e+02 numeric
part_01_density_Z_7_1 2.084000e+01 1.975000e+01 1.154000e+01 2.63 1.459800e+02 numeric
part_01_density_Z_3_0 1.069000e+01 9.520000e+00 6.880000e+00 0.64 8.632000e+01 numeric
part_01_density_Z_5_2 2.346000e+01 2.109000e+01 1.527000e+01 2.07 1.634900e+02 numeric
part_01_density_Z_6_1 2.213000e+01 2.211000e+01 1.389000e+01 0.59 1.562900e+02 numeric
part_01_density_Z_3_1 1.607000e+01 1.362000e+01 1.112000e+01 1.39 1.174500e+02 numeric
part_01_density_Z_6_0 1.138000e+01 1.248000e+01 6.230000e+00 0.01 1.115900e+02 numeric
part_01_density_Z_2_1 2.529000e+01 1.975000e+01 1.909000e+01 0.80 1.553600e+02 numeric
part_01_density_Z_6_3 3.098000e+01 3.012000e+01 1.978000e+01 1.26 2.229200e+02 numeric
part_01_density_Z_2_0 1.928000e+01 1.474000e+01 1.485000e+01 0.06 1.281300e+02 numeric
part_01_density_Z_6_2 2.829000e+01 2.788000e+01 1.795000e+01 0.88 1.895800e+02 numeric
part_01_density_Z_5_0 1.404000e+01 1.303000e+01 8.240000e+00 0.98 1.056100e+02 numeric
part_01_density_Z_5_1 2.001000e+01 1.809000e+01 1.283000e+01 2.04 1.432400e+02 numeric
part_01_density_Z_4_2 2.892000e+01 2.578000e+01 2.030000e+01 0.97 1.999300e+02 numeric
part_01_density_Z_1_0 1.540000e+00 3.200000e-01 1.490000e+00 0.64 4.290000e+00 numeric
part_01_density_Z_4_1 2.530000e+01 2.278000e+01 1.782000e+01 0.48 1.697600e+02 numeric
part_01_density_Z_7_2 2.547000e+01 2.444000e+01 1.462000e+01 2.66 1.800600e+02 numeric
part_01_density_Z_4_0 1.495000e+01 1.404000e+01 1.051000e+01 0.01 1.166400e+02 numeric
part_02_shape_segments_count 2.379700e+02 8.491800e+02 9.000000e+00 0.00 4.365300e+04 numeric
part_02_density_segments_count 2.379700e+02 8.491800e+02 9.000000e+00 0.00 4.365300e+04 numeric
part_02_volume 1.963000e+01 3.454000e+01 7.260000e+00 0.00 1.632540e+03 numeric
part_02_electrons 1.291000e+01 2.088000e+01 4.690000e+00 0.00 3.511900e+02 numeric
part_02_mean 6.700000e-01 4.700000e-01 6.000000e-01 0.00 8.480000e+00 numeric
part_02_std 1.800000e-01 3.000000e-01 9.000000e-02 0.00 7.080000e+00 numeric
part_02_max 1.310000e+00 1.600000e+00 8.800000e-01 0.00 4.344000e+01 numeric
part_02_max_over_std 9.430000e+00 7.850000e+00 7.160000e+00 0.00 1.732500e+02 numeric
part_02_skewness 1.900000e-01 3.400000e-01 8.000000e-02 0.00 9.950000e+00 numeric
part_02_parts 1.300000e+00 9.300000e-01 1.000000e+00 0.00 2.300000e+01 numeric
part_02_shape_O3 1.023403e+06 4.303259e+06 4.984300e+04 74.94 4.494159e+08 numeric
part_02_shape_O4 3.618620e+12 2.266623e+14 6.009194e+08 1808.75 5.588028e+16 numeric
part_02_shape_O5 3.695066e+19 7.001250e+21 2.022654e+12 13311.03 2.033831e+24 numeric
part_02_shape_FL 9.563642e+15 1.371027e+18 6.549504e+09 0.00 4.149300e+20 numeric
part_02_shape_O3_norm 5.800000e-01 5.200000e-01 3.700000e-01 0.23 1.910000e+01 numeric
part_02_shape_O4_norm 9.000000e-02 1.700000e-01 3.000000e-02 0.02 1.709000e+01 numeric
part_02_shape_O5_norm 0.000000e+00 2.000000e-02 0.000000e+00 0.00 6.420000e+00 numeric
part_02_shape_FL_norm 3.400000e-01 6.500000e+00 0.000000e+00 0.00 1.156900e+03 numeric
part_02_shape_I1 1.278608e+09 1.597263e+10 3.141227e+06 207.72 3.405768e+12 numeric
part_02_shape_I2 3.412012e+19 6.420550e+21 1.394457e+12 10931.95 1.940322e+24 numeric
part_02_shape_I3 1.623284e+20 2.125026e+22 3.963852e+12 9864.81 5.837646e+24 numeric
part_02_shape_I4 6.127652e+15 7.712413e+17 3.698479e+09 0.00 2.208664e+20 numeric
part_02_shape_I5 3.836992e+15 4.176162e+17 1.019123e+09 0.00 9.149077e+19 numeric
part_02_shape_I6 3.643785e+16 3.112328e+18 7.058095e+10 5421.64 7.960270e+20 numeric
part_02_shape_I1_norm 1.020000e+00 3.790000e+00 2.300000e-01 0.06 4.512700e+02 numeric
part_02_shape_I2_norm 6.300000e-01 2.098000e+01 1.000000e-02 0.00 3.532660e+03 numeric
part_02_shape_I3_norm 1.478000e+01 8.441900e+02 3.000000e-02 0.00 1.947007e+05 numeric
part_02_shape_I4_norm 3.000000e-01 6.400000e+00 0.000000e+00 0.00 1.156130e+03 numeric
part_02_shape_I5_norm 2.700000e-01 6.380000e+00 0.000000e+00 0.00 1.155620e+03 numeric
part_02_shape_I6_norm 1.550000e+00 3.431000e+01 4.000000e-02 0.00 5.890920e+03 numeric
part_02_shape_M000 2.635150e+03 4.265030e+03 1.177000e+03 32.00 2.040670e+05 numeric
part_02_shape_CI 2.000000e-02 4.680000e+00 1.000000e-02 -98.04 1.086200e+02 numeric
part_02_shape_E3_E1 2.700000e-01 2.100000e-01 2.300000e-01 0.00 9.900000e-01 numeric
part_02_shape_E2_E1 4.400000e-01 2.500000e-01 4.400000e-01 0.00 1.000000e+00 numeric
part_02_shape_E3_E2 5.800000e-01 2.300000e-01 5.900000e-01 0.01 1.000000e+00 numeric
part_02_shape_sqrt_E1 7.120000e+00 5.910000e+00 5.110000e+00 0.95 5.649000e+01 numeric
part_02_shape_sqrt_E2 3.790000e+00 2.660000e+00 3.060000e+00 0.58 2.892000e+01 numeric
part_02_shape_sqrt_E3 2.490000e+00 1.360000e+00 2.290000e+00 0.43 1.822000e+01 numeric
part_02_density_O3 5.966843e+05 1.985734e+06 3.104260e+04 13.11 9.109740e+07 numeric
part_02_density_O4 7.373334e+11 1.010164e+13 2.346613e+08 44.30 2.288257e+15 numeric
part_02_density_O5 5.971803e+17 5.509864e+19 4.767582e+11 41.57 1.682776e+22 numeric
part_02_density_FL 9.384562e+14 3.061831e+16 2.203128e+09 -5.97 7.209550e+18 numeric
part_02_density_O3_norm 7.800000e-01 8.100000e-01 5.500000e-01 0.04 3.879000e+01 numeric
part_02_density_O4_norm 1.700000e-01 5.300000e-01 7.000000e-02 0.00 1.155500e+02 numeric
part_02_density_O5_norm 1.000000e-02 3.600000e-01 0.000000e+00 0.00 1.130500e+02 numeric
part_02_density_FL_norm 1.660000e+00 4.495000e+01 1.000000e-02 0.00 7.790880e+03 numeric
part_02_density_I1 6.934937e+08 4.528853e+09 1.808901e+06 39.35 4.671895e+11 numeric
part_02_density_I2 1.936164e+18 1.355701e+20 4.741799e+11 359.65 3.570996e+22 numeric
part_02_density_I3 1.538042e+19 5.575973e+20 1.249398e+12 415.11 1.112850e+23 numeric
part_02_density_I4 6.337055e+14 1.845494e+16 1.333759e+09 -1.55 3.944832e+18 numeric
part_02_density_I5 4.305383e+14 1.171286e+16 4.798817e+08 0.00 2.050033e+18 numeric
part_02_density_I6 5.395791e+15 1.062134e+17 2.509640e+10 193.32 1.566874e+19 numeric
part_02_density_I1_norm 2.230000e+00 1.264000e+01 4.700000e-01 0.00 1.857550e+03 numeric
part_02_density_I2_norm 6.210000e+00 5.150700e+02 3.000000e-02 0.00 1.496948e+05 numeric
part_02_density_I3_norm 1.618400e+02 1.239533e+04 1.000000e-01 0.00 3.300967e+06 numeric
part_02_density_I4_norm 1.550000e+00 4.289000e+01 1.000000e-02 0.00 7.790290e+03 numeric
part_02_density_I5_norm 1.470000e+00 4.238000e+01 0.000000e+00 0.00 7.789890e+03 numeric
part_02_density_I6_norm 7.050000e+00 2.288400e+02 1.200000e-01 0.00 4.926182e+04 numeric
part_02_density_M000 1.733330e+03 2.572700e+03 7.827900e+02 4.13 4.389833e+04 numeric
part_02_density_CI 2.000000e-02 5.220000e+00 1.000000e-02 -107.87 1.143800e+02 numeric
part_02_density_E3_E1 2.700000e-01 2.200000e-01 2.300000e-01 0.00 9.900000e-01 numeric
part_02_density_E2_E1 4.400000e-01 2.500000e-01 4.400000e-01 0.00 1.000000e+00 numeric
part_02_density_E3_E2 5.800000e-01 2.300000e-01 5.900000e-01 0.02 1.000000e+00 numeric
part_02_density_sqrt_E1 6.880000e+00 5.790000e+00 4.800000e+00 0.93 5.708000e+01 numeric
part_02_density_sqrt_E2 3.630000e+00 2.570000e+00 2.870000e+00 0.58 2.860000e+01 numeric
part_02_density_sqrt_E3 2.380000e+00 1.290000e+00 2.180000e+00 0.43 1.768000e+01 numeric
part_02_shape_Z_7_3 3.271000e+01 3.071000e+01 2.042000e+01 6.93 4.045800e+02 numeric
part_02_shape_Z_0_0 2.007000e+01 1.452000e+01 1.676000e+01 2.76 2.207200e+02 numeric
part_02_shape_Z_7_0 1.527000e+01 1.400000e+01 8.980000e+00 1.20 2.288400e+02 numeric
part_02_shape_Z_7_1 2.314000e+01 2.183000e+01 1.347000e+01 3.76 3.071900e+02 numeric
part_02_shape_Z_3_0 1.242000e+01 1.120000e+01 8.470000e+00 0.66 1.722100e+02 numeric
part_02_shape_Z_5_2 2.735000e+01 2.455000e+01 1.898000e+01 5.23 3.441100e+02 numeric
part_02_shape_Z_6_1 2.446000e+01 2.451000e+01 1.572000e+01 1.24 3.139600e+02 numeric
part_02_shape_Z_3_1 1.932000e+01 1.624000e+01 1.452000e+01 2.73 2.226900e+02 numeric
part_02_shape_Z_6_0 1.192000e+01 1.270000e+01 7.670000e+00 0.00 1.772800e+02 numeric
part_02_shape_Z_2_1 2.902000e+01 2.343000e+01 2.245000e+01 1.59 3.159100e+02 numeric
part_02_shape_Z_6_3 3.598000e+01 3.473000e+01 2.393000e+01 3.28 4.341400e+02 numeric
part_02_shape_Z_2_0 2.100000e+01 1.692000e+01 1.672000e+01 0.15 2.567500e+02 numeric
part_02_shape_Z_6_2 3.218000e+01 3.176000e+01 2.095000e+01 2.36 3.929800e+02 numeric
part_02_shape_Z_5_0 1.531000e+01 1.448000e+01 9.060000e+00 1.06 2.273200e+02 numeric
part_02_shape_Z_5_1 2.256000e+01 2.060000e+01 1.531000e+01 2.86 3.003900e+02 numeric
part_02_shape_Z_4_2 3.314000e+01 3.037000e+01 2.358000e+01 2.23 3.755500e+02 numeric
part_02_shape_Z_1_0 1.660000e+00 4.000000e-01 1.630000e+00 0.67 4.570000e+00 numeric
part_02_shape_Z_4_1 2.809000e+01 2.650000e+01 1.971000e+01 1.16 3.450400e+02 numeric
part_02_shape_Z_7_2 2.897000e+01 2.762000e+01 1.744000e+01 5.37 3.709200e+02 numeric
part_02_shape_Z_4_0 1.539000e+01 1.532000e+01 1.056000e+01 0.01 2.127600e+02 numeric
part_02_density_Z_7_3 2.691000e+01 2.515000e+01 1.610000e+01 4.99 1.891000e+02 numeric
part_02_density_Z_0_0 1.628000e+01 1.177000e+01 1.367000e+01 0.99 1.023700e+02 numeric
part_02_density_Z_7_0 1.415000e+01 1.243000e+01 8.540000e+00 1.36 1.065300e+02 numeric
part_02_density_Z_7_1 2.015000e+01 1.856000e+01 1.138000e+01 3.71 1.395400e+02 numeric
part_02_density_Z_3_0 1.041000e+01 9.070000e+00 6.840000e+00 0.65 8.338000e+01 numeric
part_02_density_Z_5_2 2.234000e+01 1.987000e+01 1.496000e+01 3.97 1.560700e+02 numeric
part_02_density_Z_6_1 2.057000e+01 2.102000e+01 1.309000e+01 0.46 1.519500e+02 numeric
part_02_density_Z_3_1 1.542000e+01 1.290000e+01 1.112000e+01 1.98 1.130300e+02 numeric
part_02_density_Z_6_0 1.070000e+01 1.209000e+01 5.860000e+00 0.02 1.097800e+02 numeric
part_02_density_Z_2_1 2.365000e+01 1.866000e+01 1.879000e+01 0.96 1.494000e+02 numeric
part_02_density_Z_6_3 2.900000e+01 2.854000e+01 1.909000e+01 1.18 2.139900e+02 numeric
part_02_density_Z_2_0 1.792000e+01 1.400000e+01 1.451000e+01 0.05 1.205600e+02 numeric
part_02_density_Z_6_2 2.632000e+01 2.638000e+01 1.710000e+01 0.87 1.837000e+02 numeric
part_02_density_Z_5_0 1.365000e+01 1.231000e+01 8.150000e+00 1.09 1.030400e+02 numeric
part_02_density_Z_5_1 1.903000e+01 1.699000e+01 1.247000e+01 1.98 1.364000e+02 numeric
part_02_density_Z_4_2 2.684000e+01 2.447000e+01 1.959000e+01 0.99 1.920300e+02 numeric
part_02_density_Z_1_0 1.650000e+00 4.000000e-01 1.630000e+00 0.61 4.580000e+00 numeric
part_02_density_Z_4_1 2.327000e+01 2.162000e+01 1.703000e+01 0.60 1.621900e+02 numeric
part_02_density_Z_7_2 2.436000e+01 2.290000e+01 1.416000e+01 4.36 1.751700e+02 numeric
part_02_density_Z_4_0 1.371000e+01 1.353000e+01 9.860000e+00 0.01 1.159500e+02 numeric
resolution 2.150000e+00 5.300000e-01 2.080000e+00 0.61 5.710000e+00 numeric
FoFc_mean 0.000000e+00 0.000000e+00 0.000000e+00 0.00 0.000000e+00 numeric
FoFc_std 1.300000e-01 5.000000e-02 1.200000e-01 0.01 9.400000e-01 numeric
FoFc_square_std 2.000000e-02 2.000000e-02 2.000000e-02 0.00 8.900000e-01 numeric
FoFc_min -7.000000e-01 2.900000e-01 -6.700000e-01 -5.04 -4.000000e-02 numeric
FoFc_max 2.600000e+00 2.550000e+00 1.820000e+00 0.04 4.335000e+01 numeric

8 Ograniczenie zbioru do 50 najpopularniejszych wartości res_name.

9 50 najpopularniejszych wartosci res_name oraz ilość przykładów.

res_name Przyklady
SO4 9520
GOL 6938
EDO 5417
NAG 4760
CL 4119
CA 3608
ZN 3147
MG 2613
HEM 2018
PO4 1780
IOD 1193
ACT 1171
DMS 1097
PEG 840
NAD 796
FAD 791
MN 779
K 742
NAP 659
ADP 583
CD 541
MAN 535
PG4 534
UNX 509
MES 486
MLY 471
MPD 448
BR 405
COA 405
1PE 399
CU 385
FMN 372
NDP 371
ACY 370
FMT 355
NI 350
ATP 336
EPE 328
C8E 325
CLA 320
ANP 306
GSH 306
NO3 304
CYC 295
CIT 289
SF4 286
PGE 278
BME 272
SEP 270
FE2 267

10 Wykresy rozkladow:

10.1 Liczby atomow.

10.2 Liczby elektronow.

11 Korelacje miedzy zmiennymi.

Tabela przedstawia korelację pomiędzy poszczególnymi liczbowymi kolumnami zbioru przy pomocy funkcji cor(), wyświetlając wyniki gdzie wartość bezwzględna jest większa od 0,6.

11.1 Tabela korelacji.

11.2 Mapa ciepła dla wybranych kolumn.

12 10 klas z najwieksza niezgodnoscia:

Niezgodnosc obliczona za pomoca zsumowanej ilości wierszy w których występuje różnica pomiędzy wartościami.

12.1 Liczby atomów.

res_name Niezgodnosc
NAG 4685
MAN 490
MLY 469
SEP 256
1PE 226
C8E 161
CLA 133
PG4 123
NAP 80
NAD 60

12.2 Liczby elektronow.

res_name Niezgodnosc
NAG 4685
MAN 490
MLY 469
SEP 256
1PE 226
C8E 161
CLA 133
PG4 123
NAP 80
NAD 60

13 Rozklad wartosci kolumn part_01.

14 Regresja liniowa:

14.1 Dla liczby atomów.

set.seed(123)

reg_at<-cor_pdb%>%filter(X2=="local_res_atom_non_h_count", X1!=c("dict_atom_non_h_count", "dict_atom_non_h_electron_sum"))
reg_at_names <- reg_at[,1]

regresja_at <- pdb_clear_last%>%
  select(reg_at_names, local_res_atom_non_h_count)

idx_at <- createDataPartition(pdb_clear_last$local_res_atom_non_h_count,
                           p=0.7, list=F)
training_at <- pdb_clear_last[idx_at,]
testing_at <- pdb_clear_last[-idx_at,]

control <- trainControl(method="repeatedcv", number=2, repeats = 5)

fit_at <- train(local_res_atom_non_h_count ~ ., data=training_at, method="glm", metric="RMSE", trControl=control)
predAt<- predict(fit_at, newdata=testing_at)
postResample(predAt,testing_at$local_res_atom_non_h_count)
##       RMSE   Rsquared        MAE 
## 0.17738558 0.99980943 0.02157673

Wartość RMSE zbliżona do 0. Dla miary R^2 również uzyskano zadowalający wynik zbliżony do 1.

14.2 Dla liczby elektronów.

set.seed(123)

reg_el<-cor_pdb%>%filter(X2=="local_res_atom_non_h_electron_sum", X1!=c("dict_atom_non_h_count", "dict_atom_non_h_electron_sum", "local_res_atom_non_h_electron_sum"))
reg_el_names <- reg_at[,1]

regresja_el <- pdb_clear_last%>%
  select(reg_el_names, local_res_atom_non_h_count)


idx_el <- createDataPartition(pdb_clear_last$local_res_atom_non_h_electron_sum,
                           p=0.7, list=F)
training_el <- pdb_clear_last[idx_el,]
testing_el <- pdb_clear_last[-idx_el,]

control <- trainControl(method="repeatedcv", number=2, repeats = 5)

fit_el <- train(local_res_atom_non_h_electron_sum ~ ., data=regresja_el, method="glm", metric="RMSE", trControl=control)
predEl<- predict(fit_el, newdata=testing_el)
postResample(predEl,testing_el$local_res_atom_non_h_electron_sum)
##       RMSE   Rsquared        MAE 
## 12.9903736  0.9776817  8.8423815

Wartość miary RMSE niezadowalająca, nie udało się uzyskać dobrego wyniku dla liczby elektronów.

15 Klasyfikator Random Forest dla wartości res_name.

15.1 Dla ntree = 5.

pdb_clear_50_rf <- pdb_clear_50%>%select(-dict_atom_non_h_electron_sum, -dict_atom_non_h_count, -local_res_atom_non_h_electron_sum,-local_res_atom_non_h_count)


idx_kl <- createDataPartition(pdb_clear_50_rf$res_name,
                           p=0.7, list=F)

training_kl <- pdb_clear_50_rf[idx_kl,]

testing_kl <- pdb_clear_50_rf[-idx_kl,]

control <- trainControl(method="repeatedcv", number=2, repeats = 5)

set.seed(123)

fit_kl <- train(as.factor(res_name) ~ .,
             data = training_kl,
             method = "rf",
             trControl = control,
             ntree = 5)

rfRes <- predict(fit_kl, newdata = testing_kl)

cm_1<-confusionMatrix(data = rfRes, 
                factor(testing_kl[,1]))

cm_1$overall
##       Accuracy          Kappa  AccuracyLower  AccuracyUpper   AccuracyNull 
##      0.3853812      0.3367881      0.3784681      0.3923292      0.1496463 
## AccuracyPValue  McnemarPValue 
##      0.0000000            NaN